package org.example.nebula.basic

import com.vesoft.nebula.connector.connector.NebulaDataFrameReader
import com.vesoft.nebula.connector.{NebulaConnectionConfig, ReadNebulaConfig}
import org.apache.spark.sql.{DataFrame, SparkSession}

object ReadData {

  // 读取本地文件
  def readCsvData(spark: SparkSession): DataFrame = {
    val df = spark.read
      .option("header", true)
      .option("delimiter", ",")
      .csv("data/nebula/data.csv")
    df
  }

  // 读取HDFS文件
  def readStringCsvData(spark: SparkSession): DataFrame = {
    val df = spark.read
            .option("header", true)
            .option("delimiter", ",")
            .csv("hdfs://nameservice1/test/data_140.csv")
    df
  }

  // 从nebula graph图数据库中读取数据
  def readNebulaGraphData(spark: SparkSession): DataFrame = {
      val config = NebulaConnectionConfig
              .builder()
              .withMetaAddress("172.16.7.114:9559")
              .withTimeout(6000)
              .withConenctionRetry(2)
              .build()

      val nebulaReadEdgeConfig: ReadNebulaConfig = ReadNebulaConfig
            .builder()
            .withSpace("demo")
            .withLabel("RelationLocationPerson")
            .withNoColumn(true)
            .withLimit(20)
            .withPartitionNum(100)
            .build()

      val df: DataFrame = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF()
      df
  }

}
